Download monthly data from January 2015M1 to 2022M10 to estimate the followings:

  1. Select at least five stocks from different industries (for the list of the firms in different industries see, https://biz.yahoo.com/p/sum_conameu.html).

First, we will import necessary libraries to chose stocks to invest in by comparing risk & returns. If there are stocks that offers lower returns with higher risk, we are going to exclude those stocks from the portfolio.

#Import Libraries
library(tidyverse, quietly = TRUE) 
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(quantmod)
## 필요한 패키지를 로딩중입니다: xts
## 필요한 패키지를 로딩중입니다: zoo
## 
## 다음의 패키지를 부착합니다: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 다음의 패키지를 부착합니다: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 필요한 패키지를 로딩중입니다: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tidyquant)
## 필요한 패키지를 로딩중입니다: lubridate
## 
## 다음의 패키지를 부착합니다: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## 필요한 패키지를 로딩중입니다: PerformanceAnalytics
## 
## 다음의 패키지를 부착합니다: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## == Need to Learn tidyquant? ====================================================
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(PortfolioAnalytics)
## 필요한 패키지를 로딩중입니다: foreach
## 
## 다음의 패키지를 부착합니다: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
library(PerformanceAnalytics)
library(timetk)
library(corrplot)
## corrplot 0.92 loaded
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(plotly)
## 
## 다음의 패키지를 부착합니다: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)
library(Metrics)
library(forecast)
## 
## 다음의 패키지를 부착합니다: 'forecast'
## The following object is masked from 'package:Metrics':
## 
##     accuracy
#Stock selection process
#Analyze multiple stocks
#Create symbols
symbols.a <- c("AMD","NVDA","MSFT","KO","PEP","AMZN","AAPL","ET","NFLX","NKE",
             "DIS","WMT","ADBE","SPY")

#Download stock data using tidyquant
stocks.prices.a <-  tq_get(symbols.a,get  = "stock.prices",from = "2015-01-01",
                           to = "2019-12-31") %>%
  group_by(symbol)

stocks.prices.a
## # A tibble: 17,598 x 8
## # Groups:   symbol [14]
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 AMD    2015-01-02  2.67  2.67  2.67  2.67        0     2.67
##  2 AMD    2015-01-05  2.67  2.7   2.64  2.66  8878200     2.66
##  3 AMD    2015-01-06  2.65  2.66  2.55  2.63 13912500     2.63
##  4 AMD    2015-01-07  2.63  2.65  2.54  2.58 12377600     2.58
##  5 AMD    2015-01-08  2.59  2.65  2.56  2.61 11136600     2.61
##  6 AMD    2015-01-09  2.63  2.64  2.58  2.63  8907600     2.63
##  7 AMD    2015-01-12  2.62  2.64  2.55  2.63  9979600     2.63
##  8 AMD    2015-01-13  2.64  2.68  2.6   2.66 17907400     2.66
##  9 AMD    2015-01-14  2.6   2.66  2.58  2.63  9989900     2.63
## 10 AMD    2015-01-15  2.62  2.65  2.49  2.52 17744000     2.52
## # ... with 17,588 more rows

Tidyquant is a package that imports stock prices from yahoo finance in a tibble format, which helps visualizing the data more easily by using ggplot2 package.

#Compute monthly returns of the multiple stocks
multpl_stock_monthly_returns.a <- stocks.prices.a %>%
  group_by(symbol) %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = 'monthly',
               col_rename = 'returns')
multpl_stock_monthly_returns.a
## # A tibble: 840 x 3
## # Groups:   symbol [14]
##    symbol date        returns
##    <chr>  <date>        <dbl>
##  1 AMD    2015-01-30 -0.0375 
##  2 AMD    2015-02-27  0.210  
##  3 AMD    2015-03-31 -0.138  
##  4 AMD    2015-04-30 -0.157  
##  5 AMD    2015-05-29  0.00885
##  6 AMD    2015-06-30  0.0526 
##  7 AMD    2015-07-31 -0.196  
##  8 AMD    2015-08-31 -0.0622 
##  9 AMD    2015-09-30 -0.0497 
## 10 AMD    2015-10-30  0.233  
## # ... with 830 more rows
#Computing monthly risk & return for each year
p.a <- multpl_stock_monthly_returns.a%>%
  mutate(year=year(date)) %>%
  group_by(symbol,year) %>%
  summarise(Return = mean(returns),
            Risk = sd(returns))
## `summarise()` has grouped output by 'symbol'. You can override using the
## `.groups` argument.
p.a
## # A tibble: 70 x 4
## # Groups:   symbol [14]
##    symbol  year   Return   Risk
##    <chr>  <dbl>    <dbl>  <dbl>
##  1 AAPL    2015 0.000113 0.0638
##  2 AAPL    2016 0.0125   0.0752
##  3 AAPL    2017 0.0352   0.0616
##  4 AAPL    2018 0.000159 0.102 
##  5 AAPL    2019 0.0560   0.0677
##  6 ADBE    2015 0.0232   0.0522
##  7 ADBE    2016 0.00876  0.0493
##  8 ADBE    2017 0.0469   0.0598
##  9 ADBE    2018 0.0239   0.0723
## 10 ADBE    2019 0.0331   0.0593
## # ... with 60 more rows
#Visualize monthly risk & return for each year
p.1 <- ggplot(p.a, aes(Risk, Return, color = symbol)) +
  geom_point(aes(frame = year, ids = symbol), size = 3) +
  scale_x_continuous(labels = scales::percent) +
  scale_y_continuous(labels = scales::percent) +
  theme(text=element_text(size = 16, family="Arial")) +
  ggtitle("Risk Return Plot By Calendar Year")
## Warning: Ignoring unknown aesthetics: frame, ids
ggplotly(p.1) %>% animation_opts(1000, easing="elastic", redraw = FALSE)
0%10%20%-5.0%0.0%5.0%10.0%
symbolAAPLADBEAMDAMZNDISETKOMSFTNFLXNKENVDAPEPSPYWMT~year: 201520152016201720182019Risk Return Plot By Calendar YearRiskReturnPlay
#Compute 5 year risk & return
p.b <- multpl_stock_monthly_returns.a %>%
  group_by(symbol) %>%
  summarise(Return = mean(returns),
            Risk = sd(returns))

#Visualize 5year risk & return
p.2 <- ggplot(p.b, aes(Risk, Return, color = symbol)) +
  geom_point(stat='identity', size = 4, show.legend  = TRUE)+
  scale_x_continuous(labels = scales::percent) +
  scale_y_continuous(labels = scales::percent) +
  theme(text=element_text(size = 16, family="Arial")) +
  ggtitle("Total Risk Return Plot")
ggplotly(p.2) %>% animation_opts(500, easing='elastic',redraw = FALSE)
5.0%10.0%15.0%0.0%2.0%4.0%6.0%
symbolAAPLADBEAMDAMZNDISETKOMSFTNFLXNKENVDAPEPSPYWMTTotal Risk Return PlotRiskReturn

Now, we can see the monthly risk & return, as well as the overall risk % return of the stock. By using ggplotly, we can directly see each stock’s performance figure and compare it with different stocks.

From the scatter plot, we can see that some stocks have lower risks but offer higher return. Thus, we will create a portfolio with, PEP, MSFT, AMZN, NVDA, and AMD. Before we go over any deep analysis, we will implement several basic quantitative analysis regarding stocks’ performance

#Create a portoflio with PEP, MSFT, AMZN, NCDA, AMD
symbols <- c('AMD','AMZN','MSFT', 'NVDA', 'PEP')
stocks.prices <-  tq_get(symbols,get  = "stock.prices",
                         from = "2015-01-01",
                         to = "2019-12-31") %>% group_by(symbol)
stocks.prices
## # A tibble: 6,285 x 8
## # Groups:   symbol [5]
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 AMD    2015-01-02  2.67  2.67  2.67  2.67        0     2.67
##  2 AMD    2015-01-05  2.67  2.7   2.64  2.66  8878200     2.66
##  3 AMD    2015-01-06  2.65  2.66  2.55  2.63 13912500     2.63
##  4 AMD    2015-01-07  2.63  2.65  2.54  2.58 12377600     2.58
##  5 AMD    2015-01-08  2.59  2.65  2.56  2.61 11136600     2.61
##  6 AMD    2015-01-09  2.63  2.64  2.58  2.63  8907600     2.63
##  7 AMD    2015-01-12  2.62  2.64  2.55  2.63  9979600     2.63
##  8 AMD    2015-01-13  2.64  2.68  2.6   2.66 17907400     2.66
##  9 AMD    2015-01-14  2.6   2.66  2.58  2.63  9989900     2.63
## 10 AMD    2015-01-15  2.62  2.65  2.49  2.52 17744000     2.52
## # ... with 6,275 more rows
#Chart stock prices
p.3 <- stocks.prices %>%
  ggplot(aes(x = date, y = adjusted, color = symbol)) +
  geom_line() +
  ylab('Stock Prices') + xlab('Date') +
  ggtitle("Price chart for multiple stocks")
ggplotly(p.3)
201620182020050100150
symbolAMDAMZNMSFTNVDAPEPPrice chart for multiple stocksDateStock Prices
#Charting stock prices #2
stocks.prices %>%
  ggplot(aes(x = date, y = adjusted)) +
  geom_line() +
  facet_wrap(~symbol, scales = "free_y") +  # facet_wrap is used to make diff frames
  theme_tq() +       # using a new theme
  labs(x = "Date", y = "Price") +
  ggtitle("Price chart for stocks")

#Computing stock returns for our portfolio
multpl_stock_monthly_returns <- stocks.prices %>%
  group_by(symbol) %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = 'monthly',
               col_rename = 'returns')

#Charting stock returns
multpl_stock_monthly_returns %>%
  select(symbol,date,returns) %>%
  ggplot(aes(x=date,y=returns,col=symbol)) +
  scale_y_continuous(labels = scales::percent,
                     breaks = seq(-0.5,0.75,0.1)) +
  geom_bar(stat='identity') +
  ggtitle('Monthly Return')+
  facet_wrap(~symbol)

#Charting monthly returns using box plot
p.4 <- multpl_stock_monthly_returns %>% 
  ggplot(aes(symbol, returns)) + 
  geom_boxplot(aes(color=symbol)) + 
  labs(x="symbols", y="returns")
ggplotly(p.4)
AMDAMZNMSFTNVDAPEP-0.4-0.20.00.20.4
symbolAMDAMZNMSFTNVDAPEPsymbolsreturns

As we can see from the return graphs, it seems that AMD has the highest volatility while PEP has the lowest volatility. Next, we are going to compute the monthly risk & return.

#Compare monthly mean returns for each stocks
p.5 <- multpl_stock_monthly_returns%>%
  mutate(year=year(date)) %>%
  group_by(symbol,year) %>%
  summarise(mean = mean(returns),
            sd = sd(returns)) %>%
  ggplot(aes(x = year, y = mean, fill = symbol)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  scale_y_continuous(breaks = seq(-0.1,0.4,0.02),
                     labels = scales::percent) +
  scale_x_continuous(breaks = seq(2012,2022,1)) +
  labs(x = "Year", y = "Mean Returns") +
  theme_tq() +
  theme(legend.position = "top") +
  scale_fill_brewer(palette="Paired") + theme_minimal() +
  ggtitle("Monthly Mean returns")
## `summarise()` has grouped output by 'symbol'. You can override using the
## `.groups` argument.
ggplotly(p.5)
20152016201720182019-2.0%0.0%2.0%4.0%6.0%8.0%10.0%12.0%14.0%
symbolAMDAMZNMSFTNVDAPEPMonthly Mean returnsYearMean Returns
#Compare monthly risk for each stock
p.6 <- multpl_stock_monthly_returns%>%
  mutate(year=year(date)) %>%
  group_by(symbol,year) %>%
  summarise(mean = mean(returns),
            sd = sd(returns)) %>%
  ggplot(aes(x = year, y = sd, fill = symbol)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  scale_y_continuous(breaks = seq(-0.1,0.4,0.02),
                     labels = scales::percent) +
  scale_x_continuous(breaks = seq(2012,2022,1)) +
  labs(x = "Year", y = "Std Dev") +
  theme_tq() +
  theme(legend.position = "top") +
  scale_fill_brewer(palette="Paired") + theme_minimal() +
  ggtitle("Monthly Std Dev")
## `summarise()` has grouped output by 'symbol'. You can override using the
## `.groups` argument.
ggplotly(p.6)
201520162017201820190.0%2.0%4.0%6.0%8.0%10.0%12.0%14.0%16.0%18.0%20.0%22.0%24.0%
symbolAMDAMZNMSFTNVDAPEPMonthly Std DevYearStd Dev

Apparently, AMD has the biggest volatility compared to other stocks while PEP showed the lowest risk among the 5 stocks. However, AMD’s return was significant as the risk was high.

Now, we are going to verify whether the return of the stock is normally distributed and also compute the correlation of each stock.

#Verifying normal distribution
multpl_stock_monthly_returns %>% 
  ggplot(aes(returns)) +
  geom_density(lwd=1) +
  geom_histogram(aes(y=..density..))+
  facet_wrap(~symbol) + xlab('return')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Although some data looks messy, it seems that the returns are all normally distributed, and we can expect that the return of the portfolio will be noramally distributed too.

#Computing correlation
multpl_stock_monthly_returns %>%
  ungroup() %>%
  select(date,returns,symbol) %>%
  pivot_wider(id_cols = date, names_from = 'symbol', values_from = 'returns') %>%
  select(-date) %>%
  ggpairs

multpl_stock_monthly_returns %>%
  spread(symbol,value=returns) %>%
  tk_xts(silent = TRUE) %>%
  cor() %>%
  corrplot()

From the data, it seems that MSFT and AMZN has the highest correlation of 0.52. Beyond that, all of the stocks have statistically weak positive correlation, which would help diversify the risk of the portfolio.

  1. Construct a portfolio of the selected stocks using data from 2015M1 to 2019M12.
  1. Find the optimum weights using MPT.
  2. Graph the efficient frontier.
  3. Find the tangency point of the Capital Allocation Line (CAL) and the efficient frontier.
  4. Calculate the CAL equation and graph CAL and the efficient frontier on the same coordinate system

To construct an efficient frontier and apply MPT concepts, we have to convert the data into xts format first for calculation purpose.

#Compute log daily returns
log_ret_daily <- stocks.prices %>%
  group_by(symbol) %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = 'daily',
               col_rename = 'returns',
               type = 'log')
head(log_ret_daily)
## # A tibble: 6 x 3
## # Groups:   symbol [1]
##   symbol date        returns
##   <chr>  <date>        <dbl>
## 1 AMD    2015-01-02  0      
## 2 AMD    2015-01-05 -0.00375
## 3 AMD    2015-01-06 -0.0113 
## 4 AMD    2015-01-07 -0.0192 
## 5 AMD    2015-01-08  0.0116 
## 6 AMD    2015-01-09  0.00763
#Convert tibble into xts
log_ret_xts <- log_ret_daily %>%
  spread(symbol, value = returns) %>%
  tk_xts()
## Warning: Non-numeric columns being dropped: date
## Using column `date` for date_var.
head(log_ret_xts)
##                     AMD         AMZN         MSFT         NVDA          PEP
## 2015-01-02  0.000000000  0.000000000  0.000000000  0.000000000  0.000000000
## 2015-01-05 -0.003752350 -0.020730712 -0.009238112 -0.017034845 -0.007546600
## 2015-01-06 -0.011342277 -0.023098034 -0.014786255 -0.030787308 -0.007603689
## 2015-01-07 -0.019194447  0.010543966  0.012625209 -0.002609274  0.028821501
## 2015-01-08  0.011560822  0.006812743  0.028994037  0.036927410  0.018011182
## 2015-01-09  0.007633625 -0.011818213 -0.008440650  0.004019935 -0.006793688

Now, the data is converted into time series format. To utilize MPT, we have to create matrix for return and risks

#Calculating mean return for each asset
mean_ret <- colMeans(log_ret_xts)
mean_ret
##          AMD         AMZN         MSFT         NVDA          PEP 
## 0.0022562238 0.0014236052 0.0010523651 0.0019749353 0.0004119731
#Create variance-covariance matrix
cov_mat <- cov(log_ret_xts)*252  #Annualize daily return 
cov_mat
##             AMD        AMZN       MSFT       NVDA         PEP
## AMD  0.39141219 0.051225796 0.03834009 0.11973625 0.012229482
## AMZN 0.05122580 0.083988444 0.04220953 0.05043909 0.009737129
## MSFT 0.03834009 0.042209528 0.05409649 0.04704958 0.012455068
## NVDA 0.11973625 0.050439090 0.04704958 0.17890260 0.010449014
## PEP  0.01222948 0.009737129 0.01245507 0.01044901 0.020950908

Below will be the basic assumptions and formula we will use create efficient frontier and define tangency portfolio

# Calculate the random weights
wts <- runif(n = length(symbols))
wts <- wts/sum(wts) # to normalize the weight into total of 1

# Calculate the portfolio returns
port_returns <- (sum(wts * mean_ret) + 1)^252 - 1

# Calculate the portfolio risk
port_risk <- sqrt(t(wts) %*% (cov_mat %*% wts))

#Define risk free rate
risk_free <- 0.0412

# Calculate the Sharpe Ratio
sharpe_ratio <- (port_returns-risk_free)/port_risk
num_port <- 5000

# Creating a matrix to store the weights

all_wts <- matrix(nrow = num_port,
                  ncol = length(symbols))

# Creating an empty vector to store portfolio returns

port_returns <- vector('numeric', length = num_port)

# Creating an empty vector to store portfolio standard deviation

port_risk <- vector('numeric', length = num_port)

# Creating an empty vector to store portfolio Sharpe ratio

sharpe_ratio <- vector('numeric', length = num_port)

for (i in seq_along(port_returns)) {
  
  wts <- runif(length(symbols))
  wts <- wts/sum(wts)
  
  # Storing weight in the matrix
  all_wts[i,] <- wts
  
  # Portfolio returns
  
  port_ret <- sum(wts * mean_ret)
  port_ret <- ((port_ret + 1)^252) - 1
  
  # Storing Portfolio Returns values
  port_returns[i] <- port_ret
  
  
  # Creating and storing portfolio risk
  port_sd <- sqrt(t(wts) %*% (cov_mat  %*% wts))
  port_risk[i] <- port_sd
  
  # Creating and storing Portfolio Sharpe Ratios
  # Assuming 4.12% Risk free rate
  
  sr <- (port_ret-risk_free)/port_sd
  sharpe_ratio[i] <- sr
  
}

# Storing the values in the table
portfolio_values <- tibble(Return = port_returns,
                  Risk = port_risk,
                  SharpeRatio = sharpe_ratio)

# Converting matrix to a tibble and changing column names
all_wts <- tk_tbl(all_wts)
## Warning in tk_tbl.data.frame(as.data.frame(data), preserve_index,
## rename_index, : Warning: No index to preserve. Object otherwise converted to
## tibble successfully.
colnames(all_wts) <- colnames(log_ret_xts)
portfolio_values <- tk_tbl(cbind(all_wts, portfolio_values))
## Warning in tk_tbl.data.frame(cbind(all_wts, portfolio_values)): Warning: No
## index to preserve. Object otherwise converted to tibble successfully.
head(portfolio_values)
## # A tibble: 6 x 8
##     AMD   AMZN   MSFT   NVDA    PEP Return  Risk SharpeRatio
##   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>       <dbl>
## 1 0.237 0.419  0.223  0.0485 0.0725  0.456 0.264        1.57
## 2 0.133 0.0900 0.542  0.0415 0.193   0.340 0.207        1.44
## 3 0.636 0.107  0.0255 0.0817 0.150   0.588 0.431        1.27
## 4 0.253 0.297  0.174  0.134  0.142   0.459 0.265        1.58
## 5 0.459 0.206  0.113  0.200  0.0218  0.594 0.372        1.49
## 6 0.231 0.229  0.240  0.0846 0.216   0.407 0.239        1.53

Now, we will find out the GMV and tangency point portfolio. GMV will be the weights that will minimize variance while tangency portfolio will be the weights that will maximize the Sharpe ratio.

min_var <- portfolio_values[which.min(portfolio_values$Risk),]
max_sr <- portfolio_values[which.max(portfolio_values$SharpeRatio),]
#Compute GMV portoflio weights
GMV <- min_var %>%
  gather(AMD:PEP, key = Asset,
         value = Weights) %>%
  mutate(Asset = as.factor(Asset)) %>%
  ggplot(aes(x = fct_reorder(Asset,Weights), y = Weights, fill = Asset)) +
  geom_bar(stat = 'identity') +
  theme_minimal() +
  labs(x = 'Assets', y = 'Weights', title = "Minimum Variance Portfolio Weights") +
  scale_y_continuous(labels = scales::percent) 
ggplotly(GMV)
NVDAMSFTAMDAMZNPEP0%20%40%60%80%
AssetAMDAMZNMSFTNVDAPEPMinimum Variance Portfolio WeightsAssetsWeights

We can see that 75% of the stocks are allocated to PEP for GMV portfolio since it has the lowest risk.

Tangency <- max_sr %>%
  gather(AMD:PEP, key = Asset,
         value = Weights) %>%
  mutate(Asset = as.factor(Asset)) %>%
  ggplot(aes(x = fct_reorder(Asset,Weights), y = Weights, fill = Asset)) +
  geom_bar(stat = 'identity') +
  theme_minimal() +
  labs(x = 'Assets', y = 'Weights', title = "Tangency Portfolio Weights") +
  scale_y_continuous(labels = scales::percent) 

ggplotly(Tangency)
MSFTPEPAMDNVDAAMZN0.0%10.0%20.0%30.0%40.0%50.0%
AssetAMDAMZNMSFTNVDAPEPTangency Portfolio WeightsAssetsWeights

For tangency portoflio, we can see that most of the assets are allocated to AMD and NVDA since they both have a higher returns and higher risks. Since the Sharpe ratio = 1.69, risk free rate = 0.0412, the CAL is CAL = 0.0412 + 1.69*sigma(p)

EF <- portfolio_values %>%
  ggplot(aes(x = Risk, y = Return, color = SharpeRatio)) +
  geom_point() +
  theme_classic() +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(labels = scales::percent) +
  labs(x = 'Annualized Risk',
       y = 'Annualized Returns',
       title = "Portfolio Optimization & Efficient Frontier") +
  geom_point(aes(x = Risk,
                 y = Return), data = min_var, color = 'red',size=3) +
  geom_point(aes(x = Risk,
                 y = Return), data = max_sr, color = 'red', size=3) +
  annotate('text', x = 0.24, y = 0.60, label = "Tangency Portfolio") +
  annotate('text', x = 0.23, y = 0.14, label = "Minimum variance portfolio") +
  annotate('text', x = 0.40, y = 0.84, label = "Capital Allocation Line") +
  geom_abline(aes(
    intercept = 0.0412, 
    slope = 1.69 
  ))
ggplotly(EF)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Tangency PortfolioMinimum variance portfolioCapital Allocation Line20.0%30.0%40.0%20%40%60%80%
0.91.11.31.5SharpeRatioPortfolio Optimization & Efficient FrontierAnnualized RiskAnnualized Returns

From the analysis, the optimum weight for AMD, AMZN, MSFT, NVDA, and PEP is 13%, 43%, 2%, 41%, and 1%.

Although these weights are going to be used for this project, we are going to add box constraint for minimum allocation of 5% and maximum allocation of 35% when it comes to building portfolio for GSBA 542 project which will be computed in the last page using PerformanceAnalytics package.

  1. Allocate $100.00 among the selected stocks using adjusted monthly closing prices at 2020. January 2020 will have a value of 100 as an index.
  1. Using the adjusted closing prices from 2020M1 to 2022M8 calculate the holding values of your portfolio (assume fixed holdings with no re-balancing taking place over time).

First, we will import new stock from 2020M1 to 2022M8 and then allocate each weight for the stocks

#Allocate weights for each stocks
#Import new stock data
wts.tp.a <- c(0.12,0.49,0.08,0.29,0.02)
stocks.prices.b <-  tq_get(symbols,get  = "stock.prices",from = "2020-01-01",to = "2022-8-31") %>%
  group_by(symbol)
head(stocks.prices.b)
## # A tibble: 6 x 8
## # Groups:   symbol [1]
##   symbol date        open  high   low close   volume adjusted
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1 AMD    2020-01-02  46.9  49.2  46.6  49.1 80331100     49.1
## 2 AMD    2020-01-03  48.0  49.4  47.5  48.6 73127400     48.6
## 3 AMD    2020-01-06  48.0  48.9  47.9  48.4 47934900     48.4
## 4 AMD    2020-01-07  49.3  49.4  48.0  48.2 58061400     48.2
## 5 AMD    2020-01-08  47.8  48.3  47.1  47.8 53767000     47.8
## 6 AMD    2020-01-09  48.9  50.0  48.4  49.0 76512800     49.0
#Compute Return for the new stock data
multpl_stock_monthly_returns.b <- stocks.prices.b %>%
  group_by(symbol) %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = 'monthly',
               col_rename = 'returns')
multpl_stock_monthly_returns.b
## # A tibble: 160 x 3
## # Groups:   symbol [5]
##    symbol date       returns
##    <chr>  <date>       <dbl>
##  1 AMD    2020-01-31 -0.0428
##  2 AMD    2020-02-28 -0.0323
##  3 AMD    2020-03-31  0     
##  4 AMD    2020-04-30  0.152 
##  5 AMD    2020-05-29  0.0269
##  6 AMD    2020-06-30 -0.0221
##  7 AMD    2020-07-31  0.472 
##  8 AMD    2020-08-31  0.173 
##  9 AMD    2020-09-30 -0.0972
## 10 AMD    2020-10-30 -0.0817
## # ... with 150 more rows
#Allocate the tangency portfolio weights for each stock
port_ret_tp.b <- multpl_stock_monthly_returns.b %>%
  tq_portfolio(assets_col = symbol,
               returns_col = returns,
               weights = wts.tp.a,
               col_rename = 'port_ret_tp',
               geometric = FALSE)
## Warning: `spread_()` was deprecated in tidyr 1.2.0.
## Please use `spread()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
port_ret_tp.b
## # A tibble: 32 x 2
##    date       port_ret_tp
##    <date>           <dbl>
##  1 2020-01-31     0.0249 
##  2 2020-02-28     0.00203
##  3 2020-03-31     0.00640
##  4 2020-04-30     0.194  
##  5 2020-05-29     0.0611 
##  6 2020-06-30     0.0904 
##  7 2020-07-31     0.164  
##  8 2020-08-31     0.149  
##  9 2020-09-30    -0.0566 
## 10 2020-10-30    -0.0524 
## # ... with 22 more rows
port_ret_tp.b %>%
  mutate(cr = cumprod(1 + port_ret_tp)) %>%
  ggplot(aes(x = date, y = cr*100)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Index',
       title = 'Tangency Portoflio Index') +
  scale_y_continuous(breaks = seq(100,300,50)) +
  scale_x_date(date_breaks = 'year',
               date_labels = '%Y') 

As we can see, the index has increased from 100 to 200, showing 100% rate of return. Now, we will compare this index with the S&P 500.

#Import S&P 500
market.prices <-  tq_get("SPY",get  = "stock.prices",from = "2020-01-01",to = "2022-08-31")
market_return <- market.prices %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = 'monthly',
               col_rename = 'market_ret')

#Chart S&P 500 Index from 100
market_return %>%
  mutate(cr = cumprod(1 + market_ret)) %>%
  ggplot(aes(x = date, y = cr*100)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Index',
       title = 'S&P 500 Index') +
  scale_y_continuous(breaks = seq(100,200,50)) +
  scale_x_date(date_breaks = 'year',
               date_labels = '%Y')

#Chart both indexes in one graph
b <- port_ret_tp.b %>%
  mutate(cr = (cumprod(1 + port_ret_tp))*100) %>%
  mutate(symbol = 'TP')
b <- subset(b,select = -c(port_ret_tp))
c <- market_return %>%
  mutate(cr = (cumprod(1 + market_ret))*100) %>%
  mutate(symbol = 'Market')
c <- subset(c,select = -c(market_ret))
bc <- rbind(b,c)
p.7 <- bc %>%
  ggplot(aes(x=date,y=cr,color=symbol))+
  geom_line() +
  xlab('Date')+
  ylab('Index')+
  ggtitle("Portfolio vs S&P 500")
ggplotly(p.7)
202020212022100150200250
symbolMarketTPPortfolio vs S&P 500DateIndex

Apparently, the tangency portfolio offered higher return than the market while showing higher volatility recently.

  1. Estimate CAPM for your portfolio using data from 2020M1 to 2022M8 and graph the estimated Beta of the CAPM and the average return of your portfolio as a point relative to SML. Comment how your portfolio compares to S&P500.

First we will build the Security market line of portfolio. We will use the 10yr bond yield as the risk-free rate.

To build a SML, we need 1. Portfolio Return 2. Market Return 3. Risk-Free Rate

#Portfolio Return
head(port_ret_tp.b)
## # A tibble: 6 x 2
##   date       port_ret_tp
##   <date>           <dbl>
## 1 2020-01-31     0.0249 
## 2 2020-02-28     0.00203
## 3 2020-03-31     0.00640
## 4 2020-04-30     0.194  
## 5 2020-05-29     0.0611 
## 6 2020-06-30     0.0904
#Market Return
head(market_return)
## # A tibble: 6 x 2
##   date       market_ret
##   <date>          <dbl>
## 1 2020-01-31   -0.00967
## 2 2020-02-28   -0.0792 
## 3 2020-03-31   -0.125  
## 4 2020-04-30    0.127  
## 5 2020-05-29    0.0476 
## 6 2020-06-30    0.0177
#Import Risk-Free rate data
getSymbols("^TNX", 
           src = 'yahoo', 
           from = "2020-01-01", 
           to = "2022-08-31",
           auto.assign = TRUE,
           warnings = FALSE)
## Warning: ^TNX contains missing values. Some functions will not work if objects
## contain missing values in the middle of the series. Consider using na.omit(),
## na.approx(), na.fill(), etc to remove or replace them.
## [1] "^TNX"
head(TNX)
##            TNX.Open TNX.High TNX.Low TNX.Close TNX.Volume TNX.Adjusted
## 2020-01-02    1.903    1.903   1.851     1.882          0        1.882
## 2020-01-03    1.828    1.840   1.786     1.788          0        1.788
## 2020-01-05       NA       NA      NA        NA         NA           NA
## 2020-01-06    1.785    1.816   1.766     1.811          0        1.811
## 2020-01-07    1.797    1.828   1.797     1.827          0        1.827
## 2020-01-08    1.823    1.876   1.802     1.874          0        1.874

Since we are dealing with monthly return data, we are going to change the data structure into monthly data for computation purpose.

#Convert into monthly value
TNX <- TNX %>%
  to.monthly()
## Warning in to.period(x, "months", indexAt = indexAt, name = name, ...): missing
## values removed from data
TNX <- TNX[,6]
head(TNX)
##        ..Adjusted
## 1 2020      1.520
## 2 2020      1.127
## 3 2020      0.698
## 4 2020      0.622
## 5 2020      0.648
## 6 2020      0.653
#Converting into tibble
TNX <- as.tibble(TNX)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
#Converting risk free rate into percentage and change column name
TNX <- TNX %>%
  mutate(rf = ..Adjusted/100)
TNX <- TNX %>%
  subset(select = -c(..Adjusted))
TNX
## # A tibble: 32 x 1
##         rf
##      <dbl>
##  1 0.0152 
##  2 0.0113 
##  3 0.00698
##  4 0.00622
##  5 0.00648
##  6 0.00653
##  7 0.00536
##  8 0.00693
##  9 0.00677
## 10 0.0086 
## # ... with 22 more rows

Now, we are going to create a new tibble that contains all the 3 essential information.

#Merge all the data
SML <- cbind(port_ret_tp.b,market_return,TNX)

#Drop one date column since it's duplicated
SML <- subset(SML,select = -c(date))
SML <- SML %>%
      relocate(date)
SML <- as.tibble(SML)
SML
## # A tibble: 32 x 4
##    date       port_ret_tp market_ret      rf
##    <date>           <dbl>      <dbl>   <dbl>
##  1 2020-01-31     0.0249    -0.00967 0.0152 
##  2 2020-02-28     0.00203   -0.0792  0.0113 
##  3 2020-03-31     0.00640   -0.125   0.00698
##  4 2020-04-30     0.194      0.127   0.00622
##  5 2020-05-29     0.0611     0.0476  0.00648
##  6 2020-06-30     0.0904     0.0177  0.00653
##  7 2020-07-31     0.164      0.0589  0.00536
##  8 2020-08-31     0.149      0.0698  0.00693
##  9 2020-09-30    -0.0566    -0.0374  0.00677
## 10 2020-10-30    -0.0524    -0.0249  0.0086 
## # ... with 22 more rows

Now, we have to create a new column that computes risk premium which equals to market return - risk free rate

#Compute risk premium
SML <- SML %>%
  mutate(risk_premium = market_ret - rf)
SML
## # A tibble: 32 x 5
##    date       port_ret_tp market_ret      rf risk_premium
##    <date>           <dbl>      <dbl>   <dbl>        <dbl>
##  1 2020-01-31     0.0249    -0.00967 0.0152       -0.0249
##  2 2020-02-28     0.00203   -0.0792  0.0113       -0.0904
##  3 2020-03-31     0.00640   -0.125   0.00698      -0.132 
##  4 2020-04-30     0.194      0.127   0.00622       0.121 
##  5 2020-05-29     0.0611     0.0476  0.00648       0.0412
##  6 2020-06-30     0.0904     0.0177  0.00653       0.0112
##  7 2020-07-31     0.164      0.0589  0.00536       0.0535
##  8 2020-08-31     0.149      0.0698  0.00693       0.0629
##  9 2020-09-30    -0.0566    -0.0374  0.00677      -0.0442
## 10 2020-10-30    -0.0524    -0.0249  0.0086       -0.0335
## # ... with 22 more rows

Remember that Security Market Line : E(R) = Beta*Risk premium + Rf. Since the data is computing monthly time series data, we will use the annualized market return and to avoid using negative figure for computing risk premium. Also, we will use average risk free rate to calculate the expected return of the portfolio.

A_rp <- mean(SML$market_ret+1)^12-1 - mean(SML$rf)
Ex_Beta <- tibble(Beta=seq(0,2,length.out=32), Ex_ret = (A_rp*Beta)/12+mean(SML$rf)/12)
Ex_Beta
## # A tibble: 32 x 2
##      Beta  Ex_ret
##     <dbl>   <dbl>
##  1 0      0.00124
##  2 0.0645 0.00179
##  3 0.129  0.00235
##  4 0.194  0.00290
##  5 0.258  0.00345
##  6 0.323  0.00400
##  7 0.387  0.00455
##  8 0.452  0.00510
##  9 0.516  0.00566
## 10 0.581  0.00621
## # ... with 22 more rows
Ex_Beta %>%
  ggplot(aes(y = Ex_ret, x = Beta)) + 
  geom_point(col='cornflowerblue', size=4) +
  xlab('Beta') + 
  ylab('Expected Return') + 
  geom_point(aes(x =1.25,  
                   y = mean(SML$port_ret_tp)), 
               color = "red", 
               size = 4) +
    annotate('text',x = 1.45, 
          y = mean(SML$port_ret_tp),label='Portfolio',size=5)+ 
  annotate('text',x = 1.6, y = 0.01,
          label = "E(R) = 0.1*Beta + 0.001",size=5) +
  ggtitle('Security Market Line') + 
  scale_y_continuous(labels = scales::percent)

The Security Market Line shows that our monthly return for the portfolio considering the Beta should be approximately 1.25%. However, our portfolio’s average monthly return is about 2.3% which means that the result is above the theoretical figure.

ggplot(data = SML, aes(y = port_ret_tp, x = risk_premium)) + 
  geom_point(col='cornflowerblue', size=4) +
  xlab('Risk Premium') + 
  ylab('Expected Return') + 
  ggtitle('Portfolio Return & Risk Premium') + 
  geom_smooth(method='lm', color = 'darkviolet', size =0.8) +
  annotate('text', x= 0.1,y=-0.1, label = 'Y=1.24X+0.03',size=5)+
  scale_y_continuous(labels = scales::percent) +
    scale_x_continuous(labels = scales::percent)
## `geom_smooth()` using formula 'y ~ x'

summary(lm(SML$port_ret_tp~SML$risk_premium))
## 
## Call:
## lm(formula = SML$port_ret_tp ~ SML$risk_premium)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.130373 -0.043364 -0.005446  0.041403  0.139203 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.03095    0.01234   2.508   0.0178 *  
## SML$risk_premium  1.24191    0.20345   6.104 1.04e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06952 on 30 degrees of freedom
## Multiple R-squared:  0.554,  Adjusted R-squared:  0.5391 
## F-statistic: 37.26 on 1 and 30 DF,  p-value: 1.042e-06

From the result above, we can see that the higher the risk premium, the higher the return of the portfolio.

Now, we are going to calculate the beta of our portfolio and compare the relationship between portfolio returns and market returns.

First, we are going to scatter plot the risk & return of the market, portfolio, and each stock to see the overall performance

#Merge market return and individual stock returns
spy_multpl_monthly_return <- market_return %>%
  mutate(symbol = 'SPY',
         returns = market_ret) %>%
  relocate(symbol) %>%
  subset(select = -c(market_ret)) %>%
  rbind(multpl_stock_monthly_returns.b)
spy_multpl_monthly_return
## # A tibble: 192 x 3
##    symbol date        returns
##    <chr>  <date>        <dbl>
##  1 SPY    2020-01-31 -0.00967
##  2 SPY    2020-02-28 -0.0792 
##  3 SPY    2020-03-31 -0.125  
##  4 SPY    2020-04-30  0.127  
##  5 SPY    2020-05-29  0.0476 
##  6 SPY    2020-06-30  0.0177 
##  7 SPY    2020-07-31  0.0589 
##  8 SPY    2020-08-31  0.0698 
##  9 SPY    2020-09-30 -0.0374 
## 10 SPY    2020-10-30 -0.0249 
## # ... with 182 more rows
p.8 <- spy_multpl_monthly_return %>% 
    group_by(symbol) %>% 
    summarise(expected_return = mean(returns),
              stand_dev = sd(returns)) %>% 
    ggplot(aes(x = stand_dev, y = expected_return, color = symbol)) +
    geom_point(size = 4) +
    geom_point(aes(x = sd(port_ret_tp.b$port_ret_tp),  
                   y = mean(port_ret_tp.b$port_ret_tp)), 
               color = "cornflowerblue", 
               size = 5) +
    geom_text(
      aes(x = sd(port_ret_tp.b$port_ret_tp) * 1.09, 
          y = mean(port_ret_tp.b$port_ret_tp), 
          label = "Portfolio")) +
  geom_text(
    aes(x= sd(market_return$market_ret) *1.14,
        y= mean(market_return$market_ret), label = 'Market'
  )) +
    ylab("expected return") +
    xlab("standard deviation") +
    ggtitle("Expected Monthly Returns v. Risk") +
    scale_y_continuous(labels = scales::percent) +
    scale_x_continuous(labels = scales::percent)
ggplotly(p.8)
PortfolioPortfolioPortfolioPortfolioPortfolioPortfolioMarketMarketMarketMarketMarketMarket5.0%7.5%10.0%12.5%15.0%1.0%2.0%3.0%4.0%
symbolAMDAMZNMSFTNVDAPEPSPYExpected Monthly Returns v. Riskstandard deviationexpected return

From the scatter plot, it is apparent that our portfolio has lower risk but higher risk than investing in AMZN due to the asset distribution effect.

Since we saw our portfolio risk & return, we are going to calculate the Beta, which indicates the volatility compared to S&P 500.

Refer that the formula for computing beta is : {beta}_{portfolio} = cov(R_p, R_m)/sigma_m) Or it is the slope of the regression line between market return and portfolio return

#Compute Beta #1
Beta_port <- cov(port_ret_tp.b$port_ret_tp,market_return$market_ret)/
  var(market_return$market_ret)
Beta_port
## [1] 1.257211
#Compute Beta #2
summary(lm(SML$port_ret_tp~SML$market_ret))
## 
## Call:
## lm(formula = SML$port_ret_tp ~ SML$market_ret)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.144818 -0.050078 -0.009622  0.043862  0.151114 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.01228    0.01286   0.955    0.347    
## SML$market_ret  1.25721    0.21889   5.743 2.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07184 on 30 degrees of freedom
## Multiple R-squared:  0.5237, Adjusted R-squared:  0.5078 
## F-statistic: 32.99 on 1 and 30 DF,  p-value: 2.862e-06

We can see that the Beta of the portfolio using both methods give the same figure. The beta of the portfolio 1.25, meaning that it is 25% more volatile than the market.

Now, we are going to draw the regression line for our CAPM model.

p.9 <- port_ret_tp.b %>% 
    mutate(market_returns = market_return$market_ret) %>% 
    ggplot(aes(x = market_returns, y = port_ret_tp)) +
    geom_point(size=4)+
    geom_point(color = "cornflowerblue",size = 3) +
    geom_smooth(method = "lm", se = FALSE, color = "red", size = 1) +
    ylab("portfolio returns") +
    xlab("market returns") +
    ggtitle("Regression Line of CAPM") +
  annotate('text', x= 0.1,y=-0.1, label = 'Y=1.25X+0.01(t=0.9)',size=5)+
  scale_y_continuous(labels = scales::percent) +
    scale_x_continuous(labels = scales::percent) 
ggplotly(p.9)
## `geom_smooth()` using formula 'y ~ x'
Y=1.25X+0.01(t=0.9)-10.0%-5.0%0.0%5.0%10.0%-20.0%-10.0%0.0%10.0%20.0%
Regression Line of CAPMmarket returnsportfolio returns
  1. Test whether the closing of the economy due to COVID-19 in March 2020 had any effect on Jensen alpha and the market risk of your CAPM model.

Jensen alpha is the difference between actual return and expected return from the CAPM model

SML$expected_return = SML$rf + SML$risk_premium * Beta_port
SML$Alpha = SML$port_ret_tp - SML$expected_return
datatable(SML)
t.test(SML$Alpha, mu=0.16)
## 
##  One Sample t-test
## 
## data:  SML$Alpha
## t = -11.61, df = 31, p-value = 8.082e-13
## alternative hypothesis: true mean is not equal to 0.16
## 95 percent confidence interval:
##  -0.009157552  0.041393214
## sample estimates:
##  mean of x 
## 0.01611783
t.test(SML$risk_premium, mu=-0.13)
## 
##  One Sample t-test
## 
## data:  SML$risk_premium
## t = 11.466, df = 31, p-value = 1.11e-12
## alternative hypothesis: true mean is not equal to -0.13
## 95 percent confidence interval:
##  -0.02774146  0.01651017
## sample estimates:
##    mean of x 
## -0.005615645
p.10 <- ggplot(data = SML, aes(x=date, y= Alpha)) + 
  geom_point(col='cornflowerblue', size=4) +
  xlab('Date') + 
  ylab('Jensen Alpha') + 
  geom_abline(aes(slope=0,intercept=0),color='red',size=1)+
  scale_y_continuous(labels = scales::percent) +
  scale_x_date(date_breaks="3 months", date_labels="%Y-%m")+
  annotate(geom = 'text',y=0.167,x=as.Date('2020-05-30'),label = 'COVID',color=c('#9900CC'),size=4.5) +
  ggtitle('Monthly Jensen Alpha')  
ggplotly(p.10)
COVID2020-032020-062020-092020-122021-032021-062021-092021-122022-032022-062022-09-10%0%10%
Monthly Jensen AlphaDateJensen Alpha
  1. Calculate CV, Sharpe, Treynor, and Sortino ratios for your portfolio and compare them to a similarly diversified portfolio of Vanguard, Fidelity, or any other similar portfolio.

A.Portoflio 1.CV of portoflio: SIGMA/MEAN

#Calculate CV
CV_p <- sd(port_ret_tp.b$port_ret_tp)/mean(port_ret_tp.b$port_ret_tp)
CV_p
## [1] 4.270936
  1. Sharpe Ratio of portfolio: (Rm-Rf)/SIGMA
SR_p <- (mean(port_ret_tp.b$port_ret_tp+1)^(32*12/36)-1-risk_free)/sd(port_ret_tp.b$port_ret_tp)
SR_p
## [1] 2.405553
  1. Treynor Ratio: (MEAN - R)/Beta
TR_p <- (mean(port_ret_tp.b$port_ret_tp+1)^(32*12/36)-1-risk_free)/Beta_port
TR_p
## [1] 0.1959266
  1. Sortino Ratio
SO_p <- SortinoRatio(port_ret_tp.b$port_ret_tp)
SO_p
##                               [,1]
## Sortino Ratio (MAR = 0%) 0.3953987

To compare with the portoflio, we will import ETF that has a similar beta to the portoflio

#Download BlackRock's semiconductor ETF
SOXX <-  tq_get('SOXX',get  = "stock.prices",from = "2020-01-01",
                           to = "2022-08-31") %>%
group_by(symbol)

SOXX_return <- SOXX %>%
  group_by(symbol) %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = 'monthly',
               col_rename = 'returns',
               method = 'log')
head(SOXX_return)
## # A tibble: 6 x 3
## # Groups:   symbol [1]
##   symbol date       returns
##   <chr>  <date>       <dbl>
## 1 SOXX   2020-01-31 -0.0516
## 2 SOXX   2020-02-28 -0.0464
## 3 SOXX   2020-03-31 -0.112 
## 4 SOXX   2020-04-30  0.145 
## 5 SOXX   2020-05-29  0.0716
## 6 SOXX   2020-06-30  0.0778

B. SOXX

1.CV of SOXX: SIGMA/MEAN

CV_SOXX <- sd(SOXX_return$returns)/mean(SOXX_return$returns)
CV_SOXX
## [1] 5.279398
  1. Sharpe Ratio of portfolio: (Rm-Rf)/SIGMA
SR_SOXX <- (mean(SOXX_return$returns+1)^(32*12/36)-1-risk_free)/sd(SOXX_return$returns)
SR_SOXX
## [1] 1.699182
  1. Treynor Ratio: (MEAN - R)/Beta
Beta_SOXX <- cov(SOXX_return$returns,market_return$market_ret)/
  var(market_return$market_ret)
TR_SOXX <- (mean(SOXX_return$returns+1)^(32*12/36)-1-risk_free)/Beta_SOXX
TR_SOXX
## [1] 0.1186559
  1. Sortino Ratio
SO_SOXX <- SortinoRatio(SOXX_return$returns)
SO_SOXX
##                              [,1]
## Sortino Ratio (MAR = 0%) 0.294809

If we create a tibble and a graph to see the difference more clearly….

P_SOXX <- tibble(
  Symbol = c('Portoflio','Portoflio','Portoflio','Portoflio','SOXX','SOXX','SOXX','SOXX'),
  Statistics = c('CV','Sharpe','Treynor','Sortino','CV','Sharpe','Treynor','Sortino'),
           Values = c(CV_p,SR_p,TR_p,SO_p,CV_SOXX,SR_SOXX,TR_SOXX,SO_SOXX)) 
P_SOXX
## # A tibble: 8 x 3
##   Symbol    Statistics Values
##   <chr>     <chr>       <dbl>
## 1 Portoflio CV          4.27 
## 2 Portoflio Sharpe      2.41 
## 3 Portoflio Treynor     0.196
## 4 Portoflio Sortino     0.395
## 5 SOXX      CV          5.28 
## 6 SOXX      Sharpe      1.70 
## 7 SOXX      Treynor     0.119
## 8 SOXX      Sortino     0.295
P__SOXX_1 <- P_SOXX%>%
  ggplot(aes(x = Statistics, y = Values, fill = Symbol)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  labs(x = "Statistics", y = "Values") +
  theme_tq() +
  theme(legend.position = "top") +
  scale_fill_manual(values=c("#E69F00", "#56B4E9"))+
  ggtitle("Different Statistics Results")
ggplotly(P__SOXX_1)
CVSharpeSortinoTreynor024
SymbolPortoflioSOXXDifferent Statistics ResultsStatisticsValues
  1. Calculate 2% VaR as a percentage of the mean return of your portfolio when the risk horizon is one year, six months, and one month. Calculate the same VaRs for the selected portfolio in item 6. Compare the VaRs of your portfolio to the ones of the market portfolios.

Since VaR is calculating the density of the qunatile, we will use qnorm function to compute the 1 month, six months, and one year VaR.

#Setting one month VaR Values
alpha=0.02
mu_onemonths <- mean(port_ret_tp.b$port_ret_tp)
sd_onemonths <- sd(port_ret_tp.b$port_ret_tp)
VaR_Port_one <- qnorm(alpha, mean = mu_onemonths, sd = sd_onemonths)
VaR_Port_one
## [1] -0.1863222

The result implicates that about 2% of the time, the expected return of the portfolio could be worse than -18.6%.

#Setting 6 months VaR Values
mu_sixmonths <- mean(port_ret_tp.b$port_ret_tp)*6
sd_sixmonths <- sd(port_ret_tp.b$port_ret_tp)*sqrt(6)
VaR_Port_six <- qnorm(alpha, mean = mu_sixmonths, sd = sd_sixmonths)
VaR_Port_six
## [1] -0.3712699
#Setting 1 year VaR Values
mu_year <- mean(port_ret_tp.b$port_ret_tp)*12
sd_year <- sd(port_ret_tp.b$port_ret_tp)*sqrt(12)
VaR_Port_year <- qnorm(alpha, mean = mu_year, sd = sd_year)
VaR_Port_year
## [1] -0.4407885

Now we are going to do the same process for the ETF we imported at Q.6.

#Setting one month VaR for SOXX
mu_onemonths_SOXX <- mean(SOXX_return$returns)
sd_onemonths_SOXX <- sd(SOXX_return$returns)
SOXX_Port_one <- qnorm(alpha, mean = mu_onemonths_SOXX, sd = sd_onemonths_SOXX)
SOXX_Port_one
## [1] -0.158152
#Setting six month VaR for SOXX
mu_sixmonths_SOXX <- mean(SOXX_return$returns)*6
sd_sixmonths_SOXX <- sd(SOXX_return$returns)*sqrt(6)
VaR_SOXX_six <- qnorm(alpha, mean = mu_sixmonths_SOXX, sd = sd_sixmonths_SOXX)
VaR_SOXX_six
## [1] -0.3303415
#Setting one year VaR for SOXX
mu_year_SOXX <- mean(SOXX_return$returns)*12
sd_year_SOXX <- sd(SOXX_return$returns)*sqrt(12)
VaR_SOXX_year <- qnorm(alpha, mean = mu_year_SOXX, sd = sd_year_SOXX)
VaR_SOXX_year
## [1] -0.4106983

Since we have the different VaR results for the portoflio and SOXX, we are going to visualize it to see the difference more clearly.

VaR_P_SOXX <- tibble(
  Symbol = c('Portoflio','Portoflio','Portoflio','SOXX','SOXX','SOXX'),
  Statistics = c('30_VaR','180_VaR','365_VaR','30_VaR','180_VaR','365_VaR'),
           Values = abs(c(VaR_Port_one,VaR_Port_six,VaR_Port_year,SOXX_Port_one,VaR_SOXX_six,VaR_SOXX_year)))
VaR_P_SOXX
## # A tibble: 6 x 3
##   Symbol    Statistics Values
##   <chr>     <chr>       <dbl>
## 1 Portoflio 30_VaR      0.186
## 2 Portoflio 180_VaR     0.371
## 3 Portoflio 365_VaR     0.441
## 4 SOXX      30_VaR      0.158
## 5 SOXX      180_VaR     0.330
## 6 SOXX      365_VaR     0.411
P__SOXX_2 <- VaR_P_SOXX%>%
  mutate(Statistics = fct_reorder(Statistics,Values)) %>% # For reoredring data
  ggplot(aes(x = Statistics, y = Values, fill = Symbol)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  labs(x = "VaR", y = "Values") +
  theme_tq() +
  theme(legend.position = "top") +
  scale_fill_brewer(palette="Pastel1") + theme_minimal()+
  scale_y_continuous(labels = scales::percent)+
  ggtitle("VaR: Portoflio vs. SOXX")
ggplotly(P__SOXX_2)
30_VaR180_VaR365_VaR0.0%10.0%20.0%30.0%40.0%
SymbolPortoflioSOXXVaR: Portoflio vs. SOXXVaRValues
  1. Graph the scatter diagram of your portfolio and comment on trends, outliers, structural breaks and any other special features. Graph the scatter diagram of your portfolio and S&P 500 on the same coordinate system and compare the trends.
port_ret_tp.b %>% 
    mutate(market_returns = market_return$market_ret) %>% 
    ggplot(aes(x = market_returns, y = port_ret_tp)) +
    geom_point(size=4)+
    geom_point(color = "cornflowerblue",size = 3) +
    geom_smooth(method = "lm", se = FALSE, color = "red", size = 1) +
    ylab("portfolio returns") +
    xlab("market returns") +
    ggtitle("Regression Line of CAPM") +
  annotate('text', x= 0.1,y=-0.1, label = 'Y=1.25X+0.01(t=0.95)',size=5)+
  scale_y_continuous(labels = scales::percent) +
    scale_x_continuous(labels = scales::percent) 
## `geom_smooth()` using formula 'y ~ x'

summary(lm(SML$port_ret_tp~SML$market_ret))
## 
## Call:
## lm(formula = SML$port_ret_tp ~ SML$market_ret)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.144818 -0.050078 -0.009622  0.043862  0.151114 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.01228    0.01286   0.955    0.347    
## SML$market_ret  1.25721    0.21889   5.743 2.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07184 on 30 degrees of freedom
## Multiple R-squared:  0.5237, Adjusted R-squared:  0.5078 
## F-statistic: 32.99 on 1 and 30 DF,  p-value: 2.862e-06
  1. Using the CAPM equation of your portfolio do two periods ex-post forecasting of the returns to your portfolio and compare your forecast to the actual returns. Find the accuracy statistics of your forecast and report them.
x <- SML$risk_premium # This should x axis
y <- SML$port_ret_tp # This should be y axis
data <- data.frame(y,x)
data1 <- data[1:30,]
data2 <- data[31:32,]
reg <- lm(y~x, data= data1)
pred <- predict(reg, newdata = data2, se.fit = TRUE)
Expost_fit <- reg$fitted.values
Expost_fit[31:32]=NA
EXPOST <- tibble(date=SML$date,return=SML$port_ret_tp,symbol='Actual')
EXPOST1 <- tibble(date = SML$date,return = Expost_fit, symbol = 'Ex_Post')
EXPOST2 <- rbind(EXPOST,EXPOST1)

p.11 <- na.omit(EXPOST2) %>%
  ggplot(aes(x=date,y=return,color=symbol)) +
  geom_line(stat='identity') +
  geom_line(size=1.5) +
  xlab('Date') +
  ylab('Portoflio Return') +
  scale_y_continuous(labels = scales::percent) +
  scale_x_date(date_breaks="6 months", date_labels="%Y-%m")+
  scale_color_manual(values=c("#E69F00", "#56B4E9"))+
  ggtitle('Actual vs Ex-Post Forecast')
ggplotly(p.11)
2020-062020-122021-062021-122022-06-20.0%-10.0%0.0%10.0%20.0%
symbolActualEx_PostActual vs Ex-Post ForecastDatePortoflio Return
mae_post <- mae(c(EXPOST$return[1:30]),c(na.omit(EXPOST1$return)))*100
mape_post <- mape(c(EXPOST$return[1:30]),c(na.omit(EXPOST1$return)))
rmse_post <- rmse(c(EXPOST$return[1:30]),c(na.omit(EXPOST1$return)))*100
mase_post <- mase(c(EXPOST$return[1:30]),c(na.omit(EXPOST1$return)))*100
Results_post <- tibble(Method = c('EX_POST_F'),
       MAE = c(mae_post),
       MAPE = c(mape_post),
       RMSE = c(rmse_post),
       MASE = c(mase_post))

datatable(Results_post)
  1. Using CAPM equation do two-periods ex-ante forecasting of returns to your portfolio assuming that the monthly risk premiums to S&P 500 will be 1.10% in 2022M9 and .5% in 2022M10.
#Return = Beta * Risk Premium
reg2 <- lm(y~x, data=data)
fore <- forecast(reg2, newdata = data.frame(x=c(0.05,0.11)),se,fit=TRUE)
fore
##   Point Forecast         Lo 80     Hi 80       Lo 95     Hi 95
## 1     0.09304497 -0.0006416754 0.1867316 -0.05296500 0.2390549
## 2     0.16755964  0.0700537175 0.2650656  0.01559735 0.3195219

The data above shows the expected return of M9 and M10, respectively 16% and 9%.

#Graph ex-ante forecast
autoplot(fore) +
  theme_gray()+
  labs(title='Ex-Ante Forecast',x='Risk Premium',y='Portoflio Return')+
  scale_y_continuous(labels = scales::percent)+
  scale_x_continuous(labels = scales::percent) 

  1. Do a naïve forecasting of the your portfolio index, MA(15), and Exponential Smoothing of your portfolio indices for the period 2021M1to 2022M10. Find the accuracy statistics of your forecasts. Compare the forecasting accuracy criterion for the three forecasting methods. Which one results in a better forecasting outcome?
#Compute stock return from 2022 M1 to 2022 M10
stocks.prices.c <-  tq_get(symbols,get  = "stock.prices",from = "2021-01-01",to = "2022-10-31") %>% group_by(symbol)
multpl_stock_monthly_returns.c <- stocks.prices.c %>%
  group_by(symbol) %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = 'monthly',
               col_rename = 'returns')

port_ret_tp.c <- multpl_stock_monthly_returns.c %>%
  tq_portfolio(assets_col = symbol,
               returns_col = returns,
               weights = wts.tp.a,
               col_rename = 'port_ret_tp',
               geometric = FALSE)

#Create New Index form 2021 to 2022
Port_Index <- port_ret_tp.c %>%
  mutate(Index = cumprod(1 + port_ret_tp)*100,
         symbol = 'Portfolio') %>%
  subset(select = -c(port_ret_tp))
  
Port_Index %>%
  ggplot(aes(x = date, y = Index)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Index',
       title = 'Tangency Portoflio Index from 2021 to 2022') +
  scale_y_continuous(breaks = seq(40,140,20)) 

#Generate naive forecast
naive <- c(NA, Port_Index$Index[-length(Port_Index$Index)])
naive
##  [1]        NA  99.57905  99.23397  97.96785 108.34630 106.93577 120.61340
##  [8] 120.33012 129.23969 121.19533 135.34424 154.01327 144.65458 125.88950
## [15] 128.06949 135.00662 102.21607 102.92346  87.90634 107.83914  97.42861
## [22]  82.81401
Naive_df <- tibble(date=Port_Index$date,
       Index = naive,
       symbol = 'Naive')
#Plot Naive Forecast
Naive_Port_Index <- rbind(Port_Index,Naive_df)
p.12 <- na.omit(Naive_Port_Index) %>%
  ggplot(aes(x=date,y=Index,color=symbol))+
  geom_line(size=1.5) +
  ggtitle("Actual & Naive Forecast")
ggplotly(p.12)
2021-012021-072022-012022-0780100120140
symbolNaivePortfolioActual & Naive ForecastdateIndex
#Compute measure statistics for naive forecast 
mae_naive <- mae(c(Port_Index$Index[-1]),c(na.omit(Naive_df$Index)))
mape_naive <- mape(c(Port_Index$Index[-1]),c(na.omit(Naive_df$Index)))*100
rmse_naive <- rmse(c(Port_Index$Index[-1]),c(na.omit(Naive_df$Index)))
mase_naive <- mase(c(Port_Index$Index[-1]),c(na.omit(Naive_df$Index)))
#Plot MA(15) of the Index
Port_Index %>%
  ggplot(aes(x = date, y = Index)) +
  geom_line(color='cornflowerblue',size=1) +
  labs(x = 'Date',
       y = 'Index',
       title = 'Tangency Portoflio Index MA(15)') +
  scale_y_continuous(breaks = seq(40,140,20)) +
  geom_ma(ma_fun = SMA, n = 15,color='red',size = 1.4) 

MA_Port <- SMA(Port_Index$Index,15)
Port_Index$Index[15:22]
## [1] 135.00662 102.21607 102.92346  87.90634 107.83914  97.42861  82.81401
## [8]  82.78881
mae_ma15 <- mae(c(Port_Index$Index[15:22]),c(na.omit(MA_Port)))
mape_ma15 <- mape(c(Port_Index$Index[15:22]),c(na.omit(MA_Port)))*100
rmse_ma15 <- rmse(c(Port_Index$Index[15:22]),c(na.omit(MA_Port)))
mase_ma15 <- mase(c(Port_Index$Index[15:22]),c(na.omit(MA_Port)))
#Exponential Smoothing
Port_Index_xts <- Port_Index %>%
  tk_xts()
## Warning: Non-numeric columns being dropped: date, symbol
## Using column `date` for date_var.
fit1 <- ses(Port_Index_xts, alpha=0.2, initial="simple", h=3)
fit2 <- ses(Port_Index_xts, alpha=0.6, initial="simple", h=3)
fit3 <- ses(Port_Index_xts, h=3)

autoplot(fit1) +
    autolayer(fitted(fit1),size = 1.2, series = 'Alpha = 0.2')+
    autolayer(fitted(fit2), size = 1.2,series = 'Alpha = 0.6')+
    autolayer(fitted(fit3), size = 1.2,series = 'Alpha = 0.814')+
    autolayer(fit1$mean, color = 'red',size = 1.2) +
    autolayer(fit2$mean, color = 'green',size =1.2) +
    autolayer(fit3$mean, color = 'orange', size = 1.2)+
    theme_minimal()+
    ylab('Index')

mae_exp <- mae(c(Port_Index$Index),c(fit3[["fitted"]]))
mape_exp <- mape(c(Port_Index$Index),c(fit3[["fitted"]]))*100
rmse_exp <- rmse(c(Port_Index$Index),c(fit3[["fitted"]]))
mase_exp <- mase(c(Port_Index$Index),c(fit3[["fitted"]]))
Results <- tibble(Method = c('Naive','MA(15)','Exponential'),
       MAE = c(mae_naive,mae_ma15,mae_exp),
       MAPE = c(mape_naive,mape_ma15,mape_exp),
       RMSE = c(rmse_naive,rmse_ma15,rmse_exp),
       MASE = c(mase_naive,mase_ma15,mase_exp))

datatable(Results)

All of the Statistics tell that Exponential Smoothing method had the lowest error and MA(15) had the highest error measure.


From here, We are going to implement some codes just for fun. First we are going to use the PerformanceAnalytics package to find optimal weights for GMV portfolio. However, we are going to allocate at least 10% of weight to each stock and constraint the maximum weight to 40%.

#Using Performance Analytics library to implement efficient frointer
#Changing data into xts
returns <- multpl_stock_monthly_returns.b %>%
  select(symbol,date,returns) %>%
  pivot_wider(names_from = symbol, values_from = returns) %>%
  tk_xts(silent = TRUE) * 100
returns
##                   AMD         AMZN       MSFT        NVDA         PEP
## 2020-01-31  -4.276982   5.83295154  5.9830766  -1.4505474  4.56485571
## 2020-02-28  -3.234043  -6.22137200 -4.5688194  14.2966381 -7.03422284
## 2020-03-31   0.000000   3.50205707 -2.6541554  -2.3956772 -8.42162702
## 2020-04-30  15.193489  26.89001190 13.6326187  10.8801396 10.14988619
## 2020-05-29   2.691353  -1.27849397  2.5391302  21.4656992 -0.55937996
## 2020-06-30  -2.211892  12.95667241 11.0559190   7.0597169  1.32010872
## 2020-07-31  47.177340  14.71136274  0.7370708  11.7606705  4.08288133
## 2020-08-31  17.293039   9.04610295 10.2752045  25.9992156  1.74341818
## 2020-09-30  -9.722530  -8.75785906 -6.7396834   1.1966521 -0.33083377
## 2020-10-30  -8.171725  -3.57540866 -3.7369780  -7.3648433 -3.83117331
## 2020-11-30  23.070797   4.34398710  6.0060411   6.9211804  8.20767334
## 2020-12-31  -1.025259   2.80583843  3.9005923  -2.5567389  3.55439090
## 2021-01-29  -6.618689  -1.55760121  4.2891924  -0.4998090 -7.90963490
## 2021-02-26  -1.319473  -3.53284141  0.4117907   5.5794025 -5.40382508
## 2021-03-31  -7.111587   0.03717834  1.4588154  -2.6369516 10.36413183
## 2021-04-30   3.974526  12.06627340  6.9601698  12.4454426  1.91588214
## 2021-05-28  -1.886794  -7.04702566 -0.7627354   8.2281277  2.62209074
## 2021-06-30  17.295202   6.73549926  8.4988820  23.1622109  0.88718165
## 2021-07-30  13.052275  -3.27222869  5.1716513  -2.5171827  5.92562692
## 2021-08-31   4.265937   4.30341716  6.1591213  14.8209920 -0.35680013
## 2021-09-30  -7.062860  -5.35181082 -6.6118972  -7.4558805 -3.16455684
## 2021-10-29  16.841594   2.66024586 17.6290959  23.4166745  7.43967770
## 2021-11-30  31.722524   3.99236975 -0.1282171  27.8053712 -1.12623244
## 2021-12-31  -9.136832  -4.92519682  1.7332757  -9.9810210  9.45288143
## 2022-01-31 -20.604583 -10.28299065 -7.5344887 -16.7454326 -0.10937750
## 2022-02-28   7.956233   2.66725185 -3.7211995  -0.4124775 -5.63623348
## 2022-03-31 -11.350738   6.14372847  3.1861710  11.9157041  2.89536276
## 2022-04-29 -21.785257 -23.75250938 -9.9866999 -32.0274032  2.58693237
## 2022-05-31  19.106647  -3.27643208 -1.8077133   0.6739691 -2.30621900
## 2022-06-30 -24.926369 -11.64592120 -5.5320686 -18.7971101  0.04124567
## 2022-07-29  23.538642  27.05959728  9.3096684  19.8166144  4.98020220
## 2022-08-30  -7.970783  -4.60911533 -6.1308723 -14.8378611 -1.12597093
p <- portfolio.spec(colnames(returns))
p.long <- p %>% add.constraint(type='long_only')

p.box.mvp <- p.long %>%
  add.constraint(type='full_investment') %>%
  add.objective(type='risk',name='StdDev',risk_aversion = 9999) %>%
  add.objective(type='return',name='mean') %>%
  add.constraint(type='box', min=0.1,max=0.4)

M.box.mvp <- optimize.portfolio(returns, p.box.mvp, optimize_method  = 'ROI', 
                            trace = TRUE)
## Registered S3 method overwritten by 'ROI':
##   method           from              
##   print.constraint PortfolioAnalytics
Eff <- extractEfficientFrontier(M.box.mvp, match.col = "StdDev", n.portfolios = 50,
                               risk_aversion = NULL)
## Warning: executing %dopar% sequentially: no parallel backend registered
chart.Weights(M.box.mvp,plot.type = 'barplot')

By utilizing Box constraints, we found out that the weights of the assets are w = (0.1,0.1,0.3,0.1,0.4). Now we are going to compare the performance of the portfolio utilizing the new weights

stocks.prices.c <-  tq_get(symbols,get  = "stock.prices",from = "2020-01-01",to = "2022-8-31") %>%
  group_by(symbol)

multpl_stock_monthly_returns.c <- stocks.prices.c %>%
  group_by(symbol) %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = 'monthly',
               col_rename = 'returns')
multpl_stock_monthly_returns.c
## # A tibble: 160 x 3
## # Groups:   symbol [5]
##    symbol date       returns
##    <chr>  <date>       <dbl>
##  1 AMD    2020-01-31 -0.0428
##  2 AMD    2020-02-28 -0.0323
##  3 AMD    2020-03-31  0     
##  4 AMD    2020-04-30  0.152 
##  5 AMD    2020-05-29  0.0269
##  6 AMD    2020-06-30 -0.0221
##  7 AMD    2020-07-31  0.472 
##  8 AMD    2020-08-31  0.173 
##  9 AMD    2020-09-30 -0.0972
## 10 AMD    2020-10-30 -0.0817
## # ... with 150 more rows
wts.gmv <- c(0.1,0.1,0.3,0.1,0.4)
port_ret_tp.c <- multpl_stock_monthly_returns.c %>%
  tq_portfolio(assets_col = symbol,
               returns_col = returns,
               weights = wts.gmv,
               col_rename = 'port_ret_GMV',
               geometric = FALSE)
port_ret_tp.c %>%
  mutate(cr = cumprod(1 + port_ret_GMV)) %>%
  ggplot(aes(x = date, y = cr*100)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Index',
       title = 'GMV Portoflio Index') +
  scale_y_continuous(breaks = seq(100,300,50)) +
  scale_x_date(date_breaks = 'year',
               date_labels = '%Y')

market.prices <-  tq_get("SPY",get  = "stock.prices",from = "2020-01-01",to = "2022-08-31")
market_return <- market.prices %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = 'monthly',
               col_rename = 'market_ret')

#Chart S&P 500 Index from 100
market_return %>%
  mutate(cr = cumprod(1 + market_ret)) %>%
  ggplot(aes(x = date, y = cr*100)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Index',
       title = 'S&P 500 Index') +
  scale_y_continuous(breaks = seq(100,200,50)) +
  scale_x_date(date_breaks = 'year',
               date_labels = '%Y')

b.a <- port_ret_tp.c %>%
  mutate(cr = (cumprod(1 + port_ret_GMV))*100) %>%
  mutate(symbol = 'GMV')
b.a <- subset(b.a,select = -c(port_ret_GMV))

bc.a <- rbind(b.a,c)
p.13 <- bc.a %>%
  ggplot(aes(x=date,y=cr,color=symbol))+
  geom_line() +
  xlab('Date')+
  ylab('Index')+
  ggtitle("New Portfolio vs S&P 500")
ggplotly(p.13)
20202021202280120160200
symbolGMVMarketNew Portfolio vs S&P 500DateIndex

Now, we are going to implement a two-sample mean test and varaince analysis to find whether the risk and return of portoflio and S&P 500 is indifferent or not.

axcx <- left_join(market_return,port_ret_tp.c, by='date') %>%
  tk_xts()
## Warning: Non-numeric columns being dropped: date
## Using column `date` for date_var.
#ANOVA 
var.test(x=market_return$market_ret,y=port_ret_tp.c$port_ret_GMV, conf.level = 0.95)
## 
##  F test to compare two variances
## 
## data:  market_return$market_ret and port_ret_tp.c$port_ret_GMV
## F = 0.90667, num df = 31, denom df = 31, p-value = 0.7868
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4425841 1.8573877
## sample estimates:
## ratio of variances 
##          0.9066699
t.test(x=market_return$market_ret,y=port_ret_tp.c$port_ret_GMV, conf.level = 0.95)
## 
##  Welch Two Sample t-test
## 
## data:  market_return$market_ret and port_ret_tp.c$port_ret_GMV
## t = -0.60034, df = 61.852, p-value = 0.5505
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03927701  0.02113474
## sample estimates:
##   mean of x   mean of y 
## 0.009301853 0.018372988

Unfortunately, there wasn’t any statistical difference in risk & return in investing in two different assets; thus investing in GMV portfolio is statistically same as investing in S&P 500 index

`